/*
HOSPITAL CORPORATIZATION

THIS DO FILE RUNS D-D REGS OF SYSTEM OWNERSHIP ON HEALTH OUTCOMES AND PATIENT RISK.

DEC 24, 2023:
PREPARE FOR REPLICATION PACKAGE;
*/

**PREAMBLE**

clear all
set more off
global datapath "<folder containing project data files>"

*Define vector of controls;
local patc_hist male ageatadmsn histelsum hist1y drgwt

local hospc bdtot teach mcare mcaid white college unempl poverty elderly state_exp_status rural

*list outcomes;
local healthout readm90d mort90d  
*Table 6 Panel B cols 1 and 2

local riskout drgwt ageatadmsn los elixsum
*Table A3 Panel D cols 1-4

*set threshold num of obs to retain hospital;
local thres 5

*set weight local 
local wt 

di "define matrices to store dd coeffs for all specs"
di "Main result matrix"
matrix define healthres = J(5,2,.)
matrix colnames healthres = `healthout'
matrix rownames healthres = est se bl mean N

di "define matrices to store event study coeffs;"
matrix define b_es = J(8,6,0)

di "Risk matrix"
matrix define riskres = J(5,4,.)
matrix colnames riskres = `riskout'
matrix rownames riskres = est se bl mean N

*Run master program file;
qui do "$codepath/0_prog_master_vfinal"

*------------------------------------------------------------------------------;

cd "$datapath"
use FULLsample_for_atulis, clear
append using FULLsample_for_atulss

ren num_prvdr_num provider 
sort provider year 

keep provider year bdtot - state_exp_status 

drop if year==2018

tostring provider, force replace 
replace provider = "0"+provider if length(provider)==5
replace provider = "00" + provider if length(provider)==4
summ
tempfile hospcontrols 
save `hospcontrols', replace 

use provider year bought_* never_* using "$datapath/ip_enr_hist_readm_sample", clear
bys provider year: keep if _n==1
drop if year<2009

*create flags for the two samples of interest;
gen is_sample = (never_is==1 | bought_1is==1)
gen ss_sample = (never_ss==1 | bought_1ss==1)

tempfile hospshell
save `hospshell', replace 

merge 1:1 provider year using `hospcontrols' 

tab year _m 
*_m==1 for years 2010 and 2011 since hospcontrols covers 2012-17. _m==2 since the patient sample only has matched treated and control hospitals, not all hospitals.

drop if _m==2
*we are not interested in unmatched hospitals.

bys provider: egen ind_merge = max(_m)
drop _m 

sort provider year 
foreach var of varlist bdtot teach rural mcare mcaid white college unempl poverty elderly state_exp_status{
	di "`var'"
	local cnt=1
	while `cnt'>0{
		by provider: replace `var' = `var'[_n+1] if `var'==.
		
		by provider: replace `var' = `var'[_n-1] if `var'==.

		count if `var'==. & ind_merge==3		
		local cnt=r(N)
	}
	replace `var'=0 if ind_merge==1
	
}

*reset medicaid expansion to zero until 2011, since the early expansions began after that.
replace state_exp_status=0 if year<2011

gen miss_hcont = ind_merge==1
drop ind_merge

save `hospshell', replace

use "$datapath/ip_enr_hist_readm_sample", clear
drop frstyr lastyr
drop if year<2009
 
di "retain patients enrolled in ffs for a year prior to admission"
keep if enr==1

di "keep only those admitted through ed for non-deferrable admission"
keep if ed==1 & i_nondef==1

di "since we are tracking up to 90d mortality/readmission, last admission cannot be later than sept 30, 2017"
keep if admsn_dt<td(1oct2017)

*create flags for the two samples of interest;
gen is_sample = (never_is==1 | bought_1is==1)
gen ss_sample = (never_ss==1 | bought_1ss==1)

di "Drop hospitals with <25 patients"
bys provider: gen hospobs = _N
preserve 
	gen one=1
	gcollapse (sum) tot=one (mean) is_sample ss_sample bought_1is bought_1ss, by(provider year)
	gen small=tot<`thres'
	bys provider: egen ever_small = max(small)
	by provider: keep if _n==1
	tab ever_small bought_1is if is_sample
	tab ever_small bought_1ss if ss_sample
	
	tempfile sizefile 
	save `sizefile', replace 
	 
restore

merge m:1 provider using `sizefile', keepusing(ever_small)
drop if _m==2
drop _m 

di "Drop hospitals which don't have atleast `thres' patients in each year"
drop if ever_small==1

*di "drop patients younger than age 65"
drop if ageatadmsn<66

di "drop patients with prior admission in last 90 days"
drop if hist90d==1

gen mort90d = (final_deathdt - dschrgdt)<=90

gen male = sex=="1"
drop if sex=="0"

gen age_65_69 = inrange(ageatadmsn,65,69)
gen age_70_74 = inrange(ageatadmsn,70,74)
gen age_75_79 = inrange(ageatadmsn,75,79)
gen age_80_84 = inrange(ageatadmsn,80,84)
gen age_85_89 = inrange(ageatadmsn,85,89)
gen age_m90 = ageatadmsn>89 & ageatadmsn!=.

di "how does mean readmission rate change with patient severity?"
gen elixcats = 1 if elixsum<=1
replace elixcats =elixsum if inrange(elixsum,2,5)
replace elixcats = 6 if elixsum>=6

tab elixcats, summ(readm90d)

bys provider: egen firstyr = min(year)

di "Bring in hospital covariates"
merge m:1 provider year using `hospshell'
drop if _m==2
drop _m 

*check if we have hosp Xs for each year;
foreach var of local hospc{
	tab year, summ(`var')
}

gen los = dschrgdt - admsn_dt +1
drop if drgwt==. 

*SAVE FINAL FILE;
save "$datapath/regfile", replace

*--------------------------------------------------------------------------------;
local samples is ss
foreach sample of local samples{
		
	di "Sample `sample'"
	di "****************************"

	use "$datapath/regfile", clear 
	
	di "limit to sample of interest"
	keep if `sample'_sample==1
	
	di "Drop unmatched hospitals"
	drop if wt_cemm_`sample'==0
	
	*create variables to run d-d regs;
	gen post = year >= conv_year if bought_1`sample'
	replace post=0 if never_`sample'==1 

	gen time = year - conv_year if  bought_1`sample' 

	replace time=0 if never_`sample'==1

	preserve 
	
	gcollapse (mean) time bought_1`sample', by(provider year)
	
	tab time bought_1`sample'
	
	restore 
	
	di "keep only (-4,3) for treated units"
	drop if time < -4 | time>3
	
	*--------------------------------------------------------------------------;
	*Health outcomes - Table 6 Panel B cols 1 and 2;
	local col=1
	foreach out of local healthout{

		if "`out'"=="mort90d"{
			local outlab "90-day mortality"
		}
		if "`out'"=="readm90d"{
			local outlab "90-day readmissions"
		}
			
		reghdfe `out' post `patc_hist' `hospc'  `wt', absorb(provider year) vce(cluster provider)
		
		*for mortality and readmission include all patient covariates; 
		
		matrix healthres[1,`col'] = round(_b[post],0.0001)	
		matrix healthres[2,`col'] = round(_se[post],0.0001)
		
		qui summ `out' if e(sample)
		matrix healthres[4,`col'] = round(r(mean),0.0001)
		matrix healthres[5,`col'] = round(r(N))
				
		*Event study;
		reghdfe `out' `patc_hist' `hospc' time_1-time_3 time_5-time_8 `wt', absorb(provider year) vce(cluster provider)
		
		di "test pretrend coefs"
		test time_1 =time_2 = time_3=0
		
		local colb = 3*(`col'-1)+1
		local colcil = 3*(`col'-1)+2
		local colciu = 3*`col'
		forval i=1/8{
				
			if `i'!=4{
				
				*local j=`i'-1
				mat b_es[`i',`colb'] = round(_b[time_`i'],0.0001)
				mat b_es[`i',`colcil'] = round((_b[time_`i'] - 1.96*_se[time_`i']),0.0001)
				mat b_es[`i',`colciu'] = round((_b[time_`i'] + 1.96*_se[time_`i']),0.0001)
			}					
			
			coefplot (matrix(b_es[,`colb']),  msym(O) msize(medlarge) ci((b_es[,`colcil'] b_es[,`colciu'])) ciopts(lc(navy))), title("`outlab'") yline(0) vertical omitted graphregion(color(white)) yla(-0.01(0.005)0.015, angle(0)) coeflabels(r1="-4" r2="-3" r3="-2" r4="-1" r5="0" r6="1" r7="2" r8="3") ytitle("Estimate") yla(,angle(0)) xtitle("Year since conversion")
			graph export "$datapath/plots/es_`out'_`sample'_pooled.png", as(png) replace
		
		}
		*event study loop ends;
		
		local col = `col'+1
	}
	*outcome loop ends;
		
	di "health outcome results"
	mat list healthres

	di "list event study coeffs"
	mat list b_es
	
	*--------------------------------------------------------------------------;
	*Table A3 Panel D outcomes;
	local col=1
	foreach out of local riskout{
			
		if "`out'"=="drgwt"{			
			reghdfe `out' post `hospc' `wt', absorb(provider year) vce(cluster provider)			
		}
		*for drgwt cannot include other patient covariates in the model; 
		
		else if inlist("`out'","ageatadmsn","elixsum","los","hist1y"){			
			reghdfe `out' post drgwt `hospc' `wt', absorb(provider year) vce(cluster provider)			
		}
		*for non-drgwt risk metrics, include drgwt as a covariate so we check whether risk is changing conditional on DRG weight, which is how we test for change in price and health outcomes; 
		
		matrix riskres[1,`col'] = round(_b[post],0.0001)	
		matrix riskres[2,`col'] = round(_se[post],0.0001)
		
		qui summ `out' if e(sample)
		matrix riskres[4,`col'] = round(r(mean),0.0001)
		matrix riskres[5,`col'] = round(r(N))
		
		local col = `col'+1
	}
	*outcome loop ends;
	
	di "patient risk results"
	mat list riskres
}
*sample type ends;

*END CODE;
